{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "6e27703e",
   "metadata": {},
   "source": [
    "# Evidently Data Drift Metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9c78ba61",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "from sklearn.datasets import fetch_openml"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "922df2f0",
   "metadata": {},
   "source": [
    "## Prepare Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c19c6681",
   "metadata": {},
   "outputs": [],
   "source": [
    "# load \"adult\" dataset\n",
    "data = fetch_openml(name='adult', version=2, as_frame='auto')\n",
    "df = data.frame\n",
    "\n",
    "# prepare target and prediction\n",
    "df['target'] = df['education-num']\n",
    "df['prediction'] = df['education-num'].values + np.random.normal(0, 6, df.shape[0])\n",
    "\n",
    "# make reference and current datasets\n",
    "reference_data = df[~df.education.isin(['Some-college', 'HS-grad', 'Bachelors'])]\n",
    "current_data = df[df.education.isin(['Some-college', 'HS-grad', 'Bachelors'])]\n",
    "current_data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c64570ed",
   "metadata": {},
   "source": [
    "## One column data drift"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7f02bf13",
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "from evidently.metrics import ColumnDriftMetric\n",
    "from evidently.report import Report\n",
    "\n",
    "report = Report(metrics=[ColumnDriftMetric(column_name=\"class\")])\n",
    "report.run(current_data=current_data, reference_data=reference_data)\n",
    "report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7adc8cc7",
   "metadata": {},
   "outputs": [],
   "source": [
    "report.json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5a992ca1",
   "metadata": {},
   "outputs": [],
   "source": [
    "report = Report(metrics=[ColumnDriftMetric(column_name=\"target\")])\n",
    "report.run(current_data=current_data, reference_data=reference_data)\n",
    "report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d5f56602",
   "metadata": {},
   "outputs": [],
   "source": [
    "report.json()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "12f2b02d",
   "metadata": {},
   "source": [
    "## Data Drift for all columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9f364320",
   "metadata": {},
   "outputs": [],
   "source": [
    "from evidently.metrics import DataDriftTable\n",
    "\n",
    "\n",
    "report = Report(metrics=[DataDriftTable()])\n",
    "report.run(current_data=current_data, reference_data=reference_data)\n",
    "report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4abb8447",
   "metadata": {},
   "outputs": [],
   "source": [
    "report.json()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bcac8862",
   "metadata": {},
   "source": [
    "## Owerall dataset drift"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "78a285ed",
   "metadata": {},
   "outputs": [],
   "source": [
    "from evidently.metrics import DatasetDriftMetric\n",
    "\n",
    "report = Report(metrics=[DatasetDriftMetric()])\n",
    "report.run(current_data=current_data, reference_data=reference_data)\n",
    "report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "23867f71",
   "metadata": {},
   "outputs": [],
   "source": [
    "report = Report(metrics=[DatasetDriftMetric(drift_share=0.4)])\n",
    "report.run(current_data=current_data, reference_data=reference_data)\n",
    "report"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6443dedd",
   "metadata": {},
   "source": [
    "## One Column Value Plot Metric"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "95a0e6e6",
   "metadata": {},
   "outputs": [],
   "source": [
    "from evidently.metrics import ColumnValuePlot\n",
    "\n",
    "report = Report(metrics=[ColumnValuePlot(column_name=\"age\")])\n",
    "report.run(current_data=current_data, reference_data=reference_data)\n",
    "report"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b307cbf5",
   "metadata": {},
   "source": [
    "## Data Drift tests"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "98d6ad30",
   "metadata": {},
   "outputs": [],
   "source": [
    "from evidently.test_preset import NoTargetPerformanceTestPreset\n",
    "from evidently.test_suite import TestSuite\n",
    "\n",
    "\n",
    "data_quality_suite = TestSuite(\n",
    "    tests=[\n",
    "        NoTargetPerformanceTestPreset(columns=[\"education-num\", \"hours-per-week\"], stattest=\"psi\"),\n",
    "    ]\n",
    ")\n",
    "\n",
    "data_quality_suite.run(current_data=current_data, reference_data=reference_data)\n",
    "data_quality_suite"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "641f7003",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
